Functional Enrichment Analysis | Fisher test by module.

Input network

Level 3 of clusters. We keep only the modules with more than 30 nodes for downstream analysis.

# Network clusters 
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/"
fea_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/sn_dlpfc/2nd_pass/FEA/heatmap/"
macro_type = params$cell_type #macro_structure. It can be cell_type, metabolites, region of the brain. 
message(paste0("Cluster: ", macro_type))
## Cluster: ast
modules_file = read.table(paste0(net_dir, macro_type, "/geneBycluster.txt"), header = T)

modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
net_output = modules_file %>% filter(! cluster_lv3 %in% as.integer(too_small)) # the clusters with at least 30 nodes 

createDT(net_output)

Universe tested:

length(net_output$ensembl)
## [1] 16346

Enrichment analysis

GO lists

Pvalue adjusted by Bonferroni

# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl 
path_to_files = "/pastel/resources/gene_lists/mynd_lists/"
geneList_file_names = c("antigen_presentation_GO.txt",
                        "apoptosis_GO.txt",
                        "autophagy_GO.txt",
                        "cell_proliferation_GO.txt",
                        "DNA_metabolic_GO.txt",
                        "DNA_methylation_GO.txt",
                        "DNA_repair_GO.txt",
                        "DNA_replication_GO.txt",
                        "endosomal_transport_GO.txt",
                        "exocytosis_GO.txt",
                        "glucose_metabolism_GO.txt",
                        "IFNb_response_GO.txt",
                        "IFNg_response_GO.txt",
                        "inflammatory_resp_GO.txt",
                        "lipid_metabolism_GO.txt",
                        "lysosome_GO.txt",
                        "macroautophagy_GO.txt",
                        "mitochondria_GO.txt",
                        "neutrophil_activation_GO.txt",
                        "phagocytosis_GO.txt",
                        "protein_ubiquitinization_GO.txt",
                        "proteolysis_GO.txt",
                        "response_cytokine_GO.txt",
                        "ribosome_GO.txt",
                        "RNA_splicing_GO.txt",
                        "translation_GO.txt",
                        "vesicle_med_transp_GO.txt",
                        "viral_response_GO.txt")

genes_universe = net_output$ensembl

cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(path_to_files, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)

p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = T)

pdf(file = paste0(fea_dir, "h_fisher_",macro_type,"_cl3_GO_bonf.pdf"), width = 16, height = 17)
p
dev.off()
## png 
##   2
p

Cell type, m109 and GWAS AD

Pvalue adjusted by Bonferroni.

Cell type markers from Johnson et al, 2022 , plus m109 from Mostafavi et al, 2018. GWAS AD from Bellenguez et al, 2022. Plaque-induced gene list - PIG from Chen et al, 2020.

# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl 
path_to_files = "/pastel/resources/gene_lists/celltype_markers/"
geneList_file_names = c("astrocytes.txt",
                        "microglia.txt",
                        "neuron.txt",
                        "oligodendrocytes.txt",
                        "endothelia.txt",
                        "m109_mostafavi.txt",
                        "GWAS_AD_Bel2022.txt",
                        "PIG_orthologs.txt")

genes_universe = net_output$ensembl

cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(path_to_files, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)

p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = T)

pdf(file = paste0(fea_dir, "h_fisher_",macro_type,"_cl3_cellType_bonf.pdf"), width = 8, height = 15)
p
dev.off()
## png 
##   2
p

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ComplexHeatmap_2.11.1 circlize_0.4.14       forcats_0.5.1         stringr_1.4.0         purrr_0.3.4           readr_2.1.2           tidyr_1.2.0          
##  [8] tibble_3.1.6          tidyverse_1.3.1       dplyr_1.0.8           performance_0.10.0    lmerTest_3.1-3        lme4_1.1-28           Matrix_1.3-4         
## [15] gplots_3.1.1          broom_0.7.12          ggplot2_3.3.5         ggeasy_0.1.3         
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-153        matrixStats_0.61.0  bitops_1.0-7        fs_1.5.2            lubridate_1.8.0     doParallel_1.0.17   insight_0.18.5      RColorBrewer_1.1-2 
##  [9] httr_1.4.2          numDeriv_2016.8-1.1 bslib_0.3.1         tools_4.1.2         backports_1.4.1     utf8_1.2.2          R6_2.5.1            DT_0.20            
## [17] KernSmooth_2.23-20  vipor_0.4.5         BiocGenerics_0.40.0 DBI_1.1.2           colorspace_2.0-3    GetoptLong_1.0.5    withr_2.4.3         tidyselect_1.1.2   
## [25] compiler_4.1.2      cli_3.2.0           rvest_1.0.2         xml2_1.3.3          sass_0.4.0          caTools_1.18.2      scales_1.1.1        digest_0.6.29      
## [33] minqa_1.2.4         rmarkdown_2.11      pkgconfig_2.0.3     htmltools_0.5.2     highr_0.9           dbplyr_2.1.1        fastmap_1.1.0       htmlwidgets_1.5.4  
## [41] rlang_1.0.1         GlobalOptions_0.1.2 readxl_1.3.1        rstudioapi_0.13     jquerylib_0.1.4     shape_1.4.6         generics_0.1.2      jsonlite_1.7.3     
## [49] crosstalk_1.2.0     gtools_3.9.2        magrittr_2.0.2      S4Vectors_0.32.3    Rcpp_1.0.8          ggbeeswarm_0.6.0    munsell_0.5.0       fansi_1.0.2        
## [57] lifecycle_1.0.1     stringi_1.7.6       yaml_2.3.5          MASS_7.3-54         parallel_4.1.2      crayon_1.5.0        lattice_0.20-45     haven_2.4.3        
## [65] splines_4.1.2       hms_1.1.1           knitr_1.37          pillar_1.7.0        rjson_0.2.21        boot_1.3-28         stats4_4.1.2        codetools_0.2-18   
## [73] reprex_2.0.1        glue_1.6.1          evaluate_0.15       modelr_0.1.8        png_0.1-7           foreach_1.5.2       vctrs_0.3.8         nloptr_2.0.0       
## [81] tzdb_0.2.0          cellranger_1.1.0    gtable_0.3.0        clue_0.3-60         assertthat_0.2.1    xfun_0.29           pheatmap_1.0.12     iterators_1.0.14   
## [89] IRanges_2.28.0      beeswarm_0.4.0      cluster_2.1.2       ellipsis_0.3.2